bitkeeper revision 1.1371 (42692d136tRflIX7G4VOXJLCJstFVw)
authorkaf24@firebug.cl.cam.ac.uk <kaf24@firebug.cl.cam.ac.uk>
Fri, 22 Apr 2005 16:57:55 +0000 (16:57 +0000)
committerkaf24@firebug.cl.cam.ac.uk <kaf24@firebug.cl.cam.ac.uk>
Fri, 22 Apr 2005 16:57:55 +0000 (16:57 +0000)
Hi folks,

 arch/xen/x86_64/kernel/entry.S         |  131
+++++++++++++++++++--------------
 arch/xen/x86_64/kernel/process.c       |    5 -
 arch/xen/x86_64/kernel/traps.c         |    3
 arch/xen/x86_64/kernel/vsyscall.c      |    2
 include/asm-xen/asm-x86_64/hypercall.h |    5 -
 include/asm-xen/asm-x86_64/system.h    |    6 -
 6 files changed, 88 insertions(+), 64 deletions(-)

Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>
Attached contains bug fixes:
1. hang at floppy driver
2. hiccups at boot time
3. FP save/restore
4. cleanups in entry.S (critical section fixup, etc.)
5. time problem (temporarily disables vgettimeofday that uses TSC). I'm
not sure if we can use TSC-based vxtime in Xen. So we need some cleanups
there.

1 & 2 were simply fixed in process.c; xen_idle() is now identical to the
32-bit one.

I'm still working on failsafe_callback (because now I'm getting it), but
the system should be ready for broader ranges of people. Some config
seems to cause a panic in the hypervisor, and I'm also looking at it.

linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/entry.S
linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/process.c
linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/traps.c
linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/vsyscall.c
linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/hypercall.h
linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/system.h

index 672704e2dbffa8058e5c7f85062365789b875397..31bb6600d011462d8b5562c778ca53950ee05c4c 100644 (file)
@@ -204,7 +204,7 @@ ENTRY(ret_from_fork)
        jnz rff_trace
 rff_action:    
        RESTORE_REST
-       cmpl $__KERNEL_CS,CS-ARGOFFSET(%rsp)    # from kernel_thread?
+       testl $3,CS-ARGOFFSET(%rsp)     # from kernel_thread?
        je   int_ret_from_sys_call
        testl $_TIF_IA32,threadinfo_flags(%rcx)
        jnz  int_ret_from_sys_call
@@ -268,12 +268,12 @@ ret_from_sys_call:
        /* edi: flagmask */
 sysret_check:          
        GET_THREAD_INFO(%rcx)
-        XEN_GET_VCPU_INFO(%r11)
-        XEN_BLOCK_EVENTS(%r11)        
+        XEN_GET_VCPU_INFO(%rsi)
+        XEN_BLOCK_EVENTS(%rsi)        
        movl threadinfo_flags(%rcx),%edx
        andl %edi,%edx
        jnz  sysret_careful 
-        XEN_UNBLOCK_EVENTS(%r11)                
+        XEN_UNBLOCK_EVENTS(%rsi)                
        RESTORE_ARGS 0,8,0
         SWITCH_TO_USER ECF_IN_SYSCALL
 
@@ -282,8 +282,8 @@ sysret_check:
 sysret_careful:
        bt $TIF_NEED_RESCHED,%edx
        jnc sysret_signal
-        XEN_GET_VCPU_INFO(%r11)
-        XEN_BLOCK_EVENTS(%r11)        
+        XEN_GET_VCPU_INFO(%rsi)
+        XEN_BLOCK_EVENTS(%rsi)        
        pushq %rdi
        call schedule
        popq  %rdi
@@ -292,8 +292,8 @@ sysret_careful:
        /* Handle a signal */ 
 sysret_signal:
 /*     sti */
-        XEN_GET_VCPU_INFO(%r11)
-        XEN_UNBLOCK_EVENTS(%r11)        
+        XEN_GET_VCPU_INFO(%rsi)
+        XEN_UNBLOCK_EVENTS(%rsi)        
        testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
        jz    1f
 
@@ -334,8 +334,8 @@ badsys:
  * Has correct top of stack, but partial stack frame.
  */    
 ENTRY(int_ret_from_sys_call)   
-        XEN_GET_VCPU_INFO(%r11)
-        XEN_BLOCK_EVENTS(%r11)
+        XEN_GET_VCPU_INFO(%rsi)
+        XEN_BLOCK_EVENTS(%rsi)
        testb $3,CS-ARGOFFSET(%rsp)
         jnz 1f
         /* Need to set the proper %ss (not NULL) for ring 3 iretq */
@@ -358,8 +358,8 @@ int_careful:
        bt $TIF_NEED_RESCHED,%edx
        jnc  int_very_careful
 /*     sti */
-        XEN_GET_VCPU_INFO(%r11)
-        XEN_UNBLOCK_EVENTS(%r11)
+        XEN_GET_VCPU_INFO(%rsi)
+        XEN_UNBLOCK_EVENTS(%rsi)
        pushq %rdi
        call schedule
        popq %rdi
@@ -368,6 +368,8 @@ int_careful:
        /* handle signals and tracing -- both require a full stack frame */
 int_very_careful:
 /*     sti */
+        XEN_GET_VCPU_INFO(%rsi)
+        XEN_UNBLOCK_EVENTS(%rsi)
        SAVE_REST
        /* Check for syscall exit trace */      
        testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
@@ -510,26 +512,26 @@ retint_check:
        jnz  retint_careful
 retint_restore_args:                           
        RESTORE_ARGS 0,8,0                                              
-        testb $3,8(%rsp)                # check CS
-        jnz  user_mode
+       testb $3,8(%rsp)                # check CS
+       jnz  user_mode
 kernel_mode:
         orb   $3,1*8(%rsp)
        iretq
 user_mode:
-        SWITCH_TO_USER 0                        
+       SWITCH_TO_USER 0                        
        
        /* edi: workmask, edx: work */  
 retint_careful:
        bt    $TIF_NEED_RESCHED,%edx
        jnc   retint_signal
-        XEN_GET_VCPU_INFO(%r11)
-        XEN_UNBLOCK_EVENTS(%r11)
+       XEN_GET_VCPU_INFO(%rsi)
+       XEN_UNBLOCK_EVENTS(%rsi)
 /*     sti */        
        pushq %rdi
        call  schedule
        popq %rdi               
-        XEN_GET_VCPU_INFO(%r11)        
-        XEN_BLOCK_EVENTS(%r11)         
+       XEN_GET_VCPU_INFO(%rsi)        
+       XEN_BLOCK_EVENTS(%rsi)          
        GET_THREAD_INFO(%rcx)
 /*     cli */
        jmp retint_check
@@ -537,16 +539,16 @@ retint_careful:
 retint_signal:
        testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
        jz    retint_restore_args
-        XEN_GET_VCPU_INFO(%r11)
-        XEN_UNBLOCK_EVENTS(%r11)
+        XEN_GET_VCPU_INFO(%rsi)
+        XEN_UNBLOCK_EVENTS(%rsi)
        SAVE_REST
        movq $-1,ORIG_RAX(%rsp)                         
        xorq %rsi,%rsi          # oldset
        movq %rsp,%rdi          # &pt_regs
        call do_notify_resume
        RESTORE_REST
-        XEN_GET_VCPU_INFO(%r11)        
-        XEN_BLOCK_EVENTS(%r11)         
+        XEN_GET_VCPU_INFO(%rsi)        
+        XEN_BLOCK_EVENTS(%rsi)         
        movl $_TIF_NEED_RESCHED,%edi
        GET_THREAD_INFO(%rcx)   
        jmp retint_check
@@ -564,11 +566,11 @@ retint_kernel:
        jc   retint_restore_args
        movl $PREEMPT_ACTIVE,threadinfo_preempt_count(%rcx)
 /*     sti */
-        XEN_GET_VCPU_INFO(%r11)
-        XEN_BLOCK_EVENTS(%r11)
+       XEN_GET_VCPU_INFO(%rsi)
+       XEN_UNBLOCK_EVENTS(%rsi)
        call schedule
-        XEN_GET_VCPU_INFO(%r11) /* %esi can be different */
-        XEN_UNBLOCK_EVENTS(%r11)
+       XEN_GET_VCPU_INFO(%rsi) /* %esi can be different */
+       XEN_BLOCK_EVENTS(%rsi)
 /*     cli */
        GET_THREAD_INFO(%rcx)
        movl $0,threadinfo_preempt_count(%rcx) 
@@ -796,7 +798,13 @@ scrit:     /**** START OF CRITICAL REGION ****/
        jnz  14f                        # process more events if necessary...
        XEN_UNLOCK_VCPU_INFO_SMP(%rsi)
         RESTORE_REST
-        jmp retint_restore_args
+        RESTORE_ARGS 0,8,0
+        testb $3,8(%rsp)                # check CS
+        jnz  crit_user_mode
+        orb   $3,1*8(%rsp)
+        iretq
+crit_user_mode:
+        SWITCH_TO_USER 0 
         
 14:    XEN_LOCKED_BLOCK_EVENTS(%rsi)
        XEN_UNLOCK_VCPU_INFO_SMP(%rsi)
@@ -842,32 +850,47 @@ critical_region_fixup:
        jmp   11b
 
 critical_fixup_table:
-       .word 0x0000,0x0000,0x0000,0x0000         # testb  $0xff,0x0(%rsi)
-       .word 0x0000,0x0000                       # jne    ffffffff8010daa0 14f
-       .word 0x0000,0x0000,0x0000,0x0000         # mov    (%rsp),%r15
-       .word 0x0808,0x0808,0x0808,0x0808,0x0808  # mov    0x8(%rsp),%r14
-       .word 0x1010,0x1010,0x1010,0x1010,0x1010  # mov    0x10(%rsp),%r13
-       .word 0x1818,0x1818,0x1818,0x1818,0x1818  # mov    0x18(%rsp),%r12
-       .word 0x2020,0x2020,0x2020,0x2020,0x2020  # mov    0x20(%rsp),%rbp
-       .word 0x2828,0x2828,0x2828,0x2828,0x2828  # mov    0x28(%rsp),%rbx
-       .word 0x3030,0x3030,0x3030,0x3030         # add    $0x30,%rsp
-       .word 0x0030,0x0030,0x0030,0x0030,0x0030  # testb  $0x1,0x74(%rsp)
-       .word 0x0030,0x0030,0x0030,0x0030,0x0030,0x0030 # jne    ffffffff8010d740 <user_mode>
-       .word 0x0030,0x0030,0x0030,0x0030         # mov    (%rsp),%r11
-       .word 0x0838,0x0838,0x0838,0x0838,0x0838  # mov    0x8(%rsp),%r10
-       .word 0x1040,0x1040,0x1040,0x1040,0x1040  # mov    0x10(%rsp),%r9
-       .word 0x1848,0x1848,0x1848,0x1848,0x1848  # mov    0x18(%rsp),%r8
-       .word 0x2060,0x2060,0x2060,0x2060,0x2060  # mov    0x20(%rsp),%rax
-       .word 0x2868,0x2868,0x2868,0x2868,0x2868  # mov    0x28(%rsp),%rcx
-       .word 0x3070,0x3070,0x3070,0x3070,0x3070  # mov    0x30(%rsp),%rdx
-       .word 0x3878,0x3878,0x3878,0x3878,0x3878  # mov    0x38(%rsp),%rsi
-       .word 0x4080,0x4080,0x4080,0x4080,0x4080  # mov    0x40(%rsp),%rdi
-       .word 0x4888,0x4888,0x4888,0x4888         # add    $0x50,%rsp
-       .word 0x0000,0x0000                       # iretq
-       .word 0x0000,0x0000,0x0000,0x0000         # movb   $0x1,0x1(%rsi)
-       .word 0x0000,0x0000,0x0000                # mov    %rsp,%rdi
-       .word 0x0000,0x0000,0x0000,0x0000,0x0000  # jmpq   11b
+        .byte 0x00,0x00,0x00,0x00                 # testb  $0xff,0x0(%rsi)
+        .byte 0x00,0x00,0x00,0x00,0x00,0x00       # jne    <crit_user_mode+0x42>
+        .byte 0x00,0x00,0x00,0x00                 # mov    (%rsp),%r15
+        .byte 0x00,0x00,0x00,0x00,0x00            # mov    0x8(%rsp),%r14
+        .byte 0x00,0x00,0x00,0x00,0x00            # mov    0x10(%rsp),%r13
+        .byte 0x00,0x00,0x00,0x00,0x00            # mov    0x18(%rsp),%r12
+        .byte 0x00,0x00,0x00,0x00,0x00            # mov    0x20(%rsp),%rbp
+        .byte 0x00,0x00,0x00,0x00,0x00            # mov    0x28(%rsp),%rbx
+        .byte 0x00,0x00,0x00,0x00                 # add    $0x30,%rsp
+        .byte 0x30,0x30,0x30,0x30                 # mov    (%rsp),%r11
+        .byte 0x30,0x30,0x30,0x30,0x30            # mov    0x8(%rsp),%r10
+        .byte 0x30,0x30,0x30,0x30,0x30            # mov    0x10(%rsp),%r9
+        .byte 0x30,0x30,0x30,0x30,0x30            # mov    0x18(%rsp),%r8
+        .byte 0x30,0x30,0x30,0x30,0x30            # mov    0x20(%rsp),%rax
+        .byte 0x30,0x30,0x30,0x30,0x30            # mov    0x28(%rsp),%rcx
+        .byte 0x30,0x30,0x30,0x30,0x30            # mov    0x30(%rsp),%rdx
+        .byte 0x30,0x30,0x30,0x30,0x30            # mov    0x38(%rsp),%rsi
+        .byte 0x30,0x30,0x30,0x30,0x30            # mov    0x40(%rsp),%rdi
+        .byte 0x30,0x30,0x30,0x30                 # add    $0x50,%rsp
+        .byte 0x80,0x80,0x80,0x80,0x80            # testb  $0x3,0x8(%rsp)
+        .byte 0x80,0x80                           # jne    ffffffff8010dc25 <crit_user_mode>
+        .byte 0x80,0x80,0x80,0x80                 # orb    $0x3,0x8(%rsp)
+        .byte 0x80,0x80                           # iretq
+                                                  # <crit_user_mode>:
+        .byte 0x80,0x80,0x80,0x80,0x80,0x80,0x80  # movq   $0x0,%gs:0x60
+        .byte 0x80,0x80,0x80,0x80,0x80
+        .byte 0x80,0x80,0x80,0x80                 # sub    $0x20,%rsp
+        .byte 0x60,0x60,0x60,0x60                 # mov    %rax,(%rsp)
+        .byte 0x60,0x60,0x60,0x60,0x60            # mov    %r11,0x8(%rsp)
+        .byte 0x60,0x60,0x60,0x60,0x60            # mov    %rcx,0x10(%rsp)
+        .byte 0x60,0x60,0x60,0x60,0x60,0x60,0x60  # movq   $0x0,0x18(%rsp)
+        .byte 0x60,0x60
+        .byte 0x60,0x60,0x60,0x60,0x60,0x60,0x60  # movq   $0x33,0x28(%rsp)
+        .byte 0x60,0x60
+        .byte 0x60,0x60,0x60,0x60,0x60,0x60,0x60  # movq   $0x2b,0x40(%rsp)
+        .byte 0x60,0x60        
+        .byte 0x60,0x60,0x60,0x60,0x60,0x60,0x60  # mov    $0x17,%rax
+        .byte 0x60,0x60                           # syscall
+        .byte 0x60,0x60,0x60,0x60,0x60            # movb   $0x1,0x1(%rsi)
+        .byte 0x60,0x60,0x60                      # mov    %rsp,%rdi
+        .byte 0x60,0x60,0x60,0x60,0x60            # jmpq   <do_hypervisor_callback+0x20>
 # Hypervisor uses this for application faults while it executes.
 ENTRY(failsafe_callback)
         hlt         
index 0ae4f42ed88d86a8221abb2185595c7af0efa087..a2e3c1f54e137e935a96b052c38606c81a9c2efa 100644 (file)
@@ -87,7 +87,9 @@ EXPORT_SYMBOL(enable_hlt);
 extern int set_timeout_timer(void);
 void xen_idle(void)
 {
-       int cpu;
+       int cpu;        
+
+       local_irq_disable();
 
        cpu = smp_processor_id();
        if (rcu_pending(cpu))
@@ -102,7 +104,6 @@ void xen_idle(void)
                local_irq_enable();
                HYPERVISOR_yield();
        }
-        set_need_resched();
 }
 
 /*
index 17d6cf5c28b1739624098b2b435059425779c125..3800567bb5b67b2202756d678d72a5e14bc453e1 100644 (file)
@@ -896,7 +896,8 @@ asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
 asmlinkage void math_state_restore(void)
 {
        struct task_struct *me = current;
-       clts();                 /* Allow maths ops (or we recurse) */
+        
+        /* clts(); */ /* 'clts' is done for us by Xen during virtual trap. */
 
        if (!used_math())
                init_fpu(me);
index aed380ed3a8b6fbd7bf07203fa7c35aa4e958c1c..f980cdefff017f8d62b233d136c0a85615573677 100644 (file)
@@ -182,7 +182,7 @@ static int __init vsyscall_init(void)
        BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)));
        map_vsyscall();
         map_vsyscall_user();    /* establish tranlation for user address space */
-       sysctl_vsyscall = 1; 
+       sysctl_vsyscall = 0;    /* TBD */
 
        return 0;
 }
index 237d77ac83c923e696789755ef0a5c04111bb0f5..ff8fdcafb834ad0f137ebdfccf6b45aeba9af65a 100644 (file)
@@ -138,12 +138,13 @@ HYPERVISOR_set_callbacks(
 
 static inline int
 HYPERVISOR_fpu_taskswitch(
-    void)
+    int set)
 {
     int ret;
     __asm__ __volatile__ (
         TRAP_INSTR
-        : "=a" (ret) : "0" ((unsigned long)__HYPERVISOR_fpu_taskswitch) : __syscall_clobber );
+        : "=a" (ret) : "0" ((unsigned long)__HYPERVISOR_fpu_taskswitch),
+          "D" ((unsigned long) set) : __syscall_clobber );
 
     return ret;
 }
index bffa9208c83d83670414757be7b8ad745dbcbc53..7c70eac4259123cc336d816069db86162cabf6c9 100644 (file)
@@ -144,8 +144,7 @@ struct alt_instr {
 /*
  * Clear and set 'TS' bit respectively
  */
-#define clts() __asm__ __volatile__ ("clts")
-
+#define clts() (HYPERVISOR_fpu_taskswitch(0))
 static inline unsigned long read_cr0(void)
 { 
         BUG();
@@ -170,8 +169,7 @@ static inline void write_cr4(unsigned long val)
 { 
         BUG();
 } 
-
-#define stts() write_cr0(8 | read_cr0())
+#define stts() (HYPERVISOR_fpu_taskswitch(1))
 
 #define wbinvd() \
        __asm__ __volatile__ ("wbinvd": : :"memory");